library(easypackages)
libraries("here","tidyverse","foreign","lmerTest","psych",
"ggeasy","emmeans","patchwork","reshape2")
codepath = here("code")
source(file.path(codepath,"utils.R"))
datapath = here("data")
plotpath = here("plots")
resultpath = here("results")
fontSize = 24
cols2use = c("#619cff","#ffa500")
data_fname = file.path(datapath, "MIRA dataset 10-21-23.sav")
tidy_data = prep_dataset(data_fname)
# Sex x Treatment Type
table(tidy_data$sex,tidy_data$tx)
##
## ASAP LEAP ESDM NDBI NDBI + VF TEACCH EIBI PRT STAR/IMPACT EIBI+PRT DTT
## Female 9 12 74 6 2 19 31 2 9 16 2
## Male 49 47 260 27 4 107 213 18 39 102 11
##
## STAR ABA
## Female 0 0
## Male 5 0
# Responder Status
table(tidy_data$responder15)
##
## Non-Responder Responder
## 102 204
table(tidy_data$responder24)
##
## Non-Responder Responder
## 247 242
# Responder Status x Treatment Type
table(tidy_data$responder15,tidy_data$tx)
##
## ASAP LEAP ESDM NDBI NDBI + VF TEACCH EIBI PRT STAR/IMPACT
## Non-Responder 5 7 41 5 2 12 12 4 5
## Responder 15 2 84 7 3 15 30 2 27
##
## EIBI+PRT DTT STAR ABA
## Non-Responder 6 3 0 0
## Responder 10 8 1 0
table(tidy_data$responder24,tidy_data$tx)
##
## ASAP LEAP ESDM NDBI NDBI + VF TEACCH EIBI PRT STAR/IMPACT
## Non-Responder 18 12 80 11 3 31 36 6 21
## Responder 16 9 92 10 2 23 42 0 23
##
## EIBI+PRT DTT STAR ABA
## Non-Responder 14 13 2 0
## Responder 16 6 3 0
# Responder Status x Sex
table(tidy_data$responder15,tidy_data$sex)
##
## Female Male
## Non-Responder 18 83
## Responder 38 161
table(tidy_data$responder24,tidy_data$sex)
##
## Female Male
## Non-Responder 52 191
## Responder 43 195
# Responder Status x Treatment ESDM vs Other
table(tidy_data$responder15,tidy_data$tx_mvl)
##
## Other ESDM
## Non-Responder 61 41
## Responder 120 84
table(tidy_data$responder24,tidy_data$tx_mvl)
##
## Other ESDM
## Non-Responder 167 80
## Responder 150 92
# Responder Status x Treatment Type (Broad)
table(tidy_data$responder15,tidy_data$tx_broad)
##
## Other EIBI NDBI ESDM
## Non-Responder 19 15 27 41
## Responder 17 38 65 84
table(tidy_data$responder24,tidy_data$tx_broad)
##
## Other EIBI NDBI ESDM
## Non-Responder 43 49 75 80
## Responder 32 48 70 92
The continuous predictors in the model should be scaled (e.g., z-scored) first.
categorical_predictors = c("tx_broad","sex")
# scale predictors as a pre-processing step for the continuous variables that will be modeled
continuous_predictors = c("age_at_start",
"tx_duration",
"intensity",
"pre_imitation",
"pre_vabs_abc",
"pre_ados_css",
# "pretx_dq",
"pretx_verbal_dq",
"pretx_nonverbal_dq")
pcavars = c("age_at_start","tx_duration","intensity",
"pre_imitation",
"pre_vabs_abc",
"pre_ados_css",
"pretx_verbal_dq",
"pretx_nonverbal_dq")
IVs: sex + tx_broad + age_at_start + intensity + pre_imitation + pre_vabs_abc + pre_ados_css + pretx_verbal_dq + pretx_nonverbal_dq
RFX: (1 | dataset)
responder_var = "responder24"
# Responder Status x Treatment Type (Broad)
tmp_tbl = table(tidy_data[,responder_var],tidy_data$tx_broad)
print(tmp_tbl)
##
## Other EIBI NDBI ESDM
## Non-Responder 43 49 75 80
## Responder 32 48 70 92
max2use = max(tmp_tbl)
data2plot = data.frame(t(tmp_tbl))
# cols2use = get_ggColorHue(4)
tx_label2use = "ESDM"
p_esdm = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle(tx_label2use) +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
tx_label2use = "EIBI"
p_eibi = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle(tx_label2use) +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
tx_label2use = "NDBI"
p_ndbi = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle(tx_label2use) +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
tx_label2use = "Other"
p_other = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle("Classroom-Based") +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
p_final = p_esdm + p_ndbi + p_eibi + p_other + plot_layout(nrow=2,ncol=2)
ggsave(filename = file.path(plotpath, "tx_sample_size_24mo.pdf"), plot = p_final,
width = 16, height=10)
p_final
# Correlation matrix after using only subjects with complete responder18 labels
mask = rowSums(is.na(tidy_data[,c(responder_var,continuous_predictors)]))==0
data2use = tidy_data %>% filter(mask)
corr_mat = cor(data2use[,continuous_predictors], use="pairwise.complete.obs")
dist = as.dist((1-corr_mat)/2)
hc = hclust(dist)
corr_mat = corr_mat[hc$order, hc$order]
data2plot = melt(corr_mat)
# colnames(data2plot)[3] = "r"
p = ggplot(data = data2plot, aes(x = Var1, y = Var2, fill = value, label = round(value,2))) +
geom_tile() +
geom_text() +
labs(x = NULL, y = NULL, fill = "r") +
scale_fill_gradientn(colours = colorRampPalette(c("blue","white","red"))(100),
limits = c(-1,1),
oob = scales::squish) +
scale_x_discrete(labels = c("Duration", "VABS ABC","Verbal DQ","Nonverbal DQ",
"ADOS CSS", "Imitation","Age at Start","Intensity")) +
scale_y_discrete(labels = c("Duration", "VABS ABC","Verbal DQ","Nonverbal DQ",
"ADOS CSS", "Imitation","Age at Start","Intensity")) +
ggtitle("Predictor Variable Correlations") +
easy_center_title() +
theme_minimal() +
theme(axis.text.x = element_text(hjust = 0.95),
axis.text.y = element_text(hjust = 0.95),
plot.title = element_text(hjust = 0.5),
legend.title.align=0.22)
p = p + easy_rotate_x_labels(angle = 90) +
theme(axis.text.y = element_text(hjust = 0.95),
axis.text.x = element_text(hjust = 0.95),
legend.title.align=0.22)
ggsave(filename = file.path(plotpath, "24mo_corrmat.pdf"), plot = p)
p
# run again with PCA on continuous predictors
form2use = as.formula(sprintf("%s ~ sex + tx_broad + PC1 + PC2 + PC3 + PC4 + PC5 + (1 | dataset)",responder_var))
# fit model
result = logistic_regression(data = tidy_data,
formula = form2use,
vars2scale = continuous_predictors,
responder_var = responder_var,
do_pca = TRUE,
pcavars = pcavars,
recon_pcs = c("PC1","PC2"))
## Generalized linear mixed model fit by maximum likelihood (Adaptive
## Gauss-Hermite Quadrature, nAGQ = 10) [glmerMod]
## Family: binomial ( logit )
## Formula: responder24 ~ sex + tx_broad + PC1 + PC2 + PC3 + PC4 + PC5 +
## (1 | dataset)
## Data: scaled_data2use
## AIC BIC logLik deviance df.resid
## 181.4995 215.9933 -79.7498 159.4995 159
## Random effects:
## Groups Name Std.Dev.
## dataset (Intercept) 0
## Number of obs: 170, groups: dataset, 8
## Fixed Effects:
## (Intercept) sexMale tx_broadEIBI tx_broadNDBI tx_broadESDM
## -0.48192 0.44672 0.63065 -0.86941 0.69720
## PC1 PC2 PC3 PC4 PC5
## -0.88842 0.82000 0.20767 0.05982 0.31363
## optimizer (bobyqa) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings
## beta ci.low95 ci.high85 OR ci.OR.low95
## (Intercept) -0.48191892 -1.9385590 0.9747212 0.6175971 0.14391117
## sexMale 0.44672117 -0.4826240 1.3760664 1.5631784 0.61716183
## tx_broadEIBI 0.63065316 -2.3939641 3.6552705 1.8788374 0.09126717
## tx_broadNDBI -0.86940788 -2.8842893 1.1454735 0.4191997 0.05589450
## tx_broadESDM 0.69719551 -0.9272554 2.3216464 2.0081131 0.39563809
## PC1 -0.88842397 -1.3130665 -0.4637814 0.4113035 0.26899391
## PC2 0.82000104 0.3383907 1.3016114 2.2705022 1.40268845
## PC3 0.20766558 -0.3003949 0.7157260 1.2308015 0.74052573
## PC4 0.05981975 -0.3747797 0.4944192 1.0616452 0.68744072
## PC5 0.31363182 -0.3214205 0.9486841 1.3683858 0.72511830
## ci.OR.high95 pval fdr
## (Intercept) 2.650428 5.166927e-01 0.6458658597
## sexMale 3.959296 3.461208e-01 0.6043629479
## tx_broadEIBI 38.677981 6.827795e-01 0.7586438491
## tx_broadNDBI 3.143930 3.977051e-01 0.6043629479
## tx_broadESDM 10.192442 4.002307e-01 0.6043629479
## PC1 0.628901 4.119892e-05 0.0004119892
## PC2 3.675214 8.464469e-04 0.0042322345
## PC3 2.045671 4.230541e-01 0.6043629479
## PC4 1.639546 7.873287e-01 0.7873286743
## PC5 2.582309 3.330540e-01 0.6043629479
# pairwise comparisons of tx_broad
pairwise_res = emmeans(result$model, pairwise ~ tx_broad)
pairwise_res
## $emmeans
## tx_broad emmean SE df asymp.LCL asymp.UCL
## Other -0.176 0.661 Inf -1.471 1.1197
## EIBI 0.455 1.442 Inf -2.372 3.2816
## NDBI -1.045 0.519 Inf -2.062 -0.0285
## ESDM 0.521 0.459 Inf -0.377 1.4204
##
## Results are averaged over the levels of: sex
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## Other - EIBI -0.6307 1.543 Inf -0.409 0.9770
## Other - NDBI 0.8694 1.028 Inf 0.846 0.8326
## Other - ESDM -0.6972 0.829 Inf -0.841 0.8348
## EIBI - NDBI 1.5001 1.555 Inf 0.965 0.7695
## EIBI - ESDM -0.0665 1.497 Inf -0.044 1.0000
## NDBI - ESDM -1.5666 0.676 Inf -2.317 0.0942
##
## Results are averaged over the levels of: sex
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 4 estimates
summary(result$pca_res)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.8636 1.1845 1.0025 0.9368 0.7359 0.60643 0.46491
## Proportion of Variance 0.4341 0.1754 0.1256 0.1097 0.0677 0.04597 0.02702
## Cumulative Proportion 0.4341 0.6095 0.7351 0.8448 0.9125 0.95849 0.98551
## PC8
## Standard deviation 0.34044
## Proportion of Variance 0.01449
## Cumulative Proportion 1.00000
data2plot = result$pca_res$rotation[hc$order,]
data2plot = melt(data2plot)
p = ggplot(data = data2plot, aes(x = Var2, y = Var1, fill = value, label = round(value,2))) +
geom_tile() +
geom_text() +
labs(x = NULL, y = NULL, fill = "Loading") +
scale_fill_gradientn(colours = colorRampPalette(c("blue","white","red"))(100),
limits = c(-0.5,0.5),
oob = scales::squish) +
scale_x_discrete(labels = colnames(result$pca_res$rotation)) +
scale_y_discrete(labels = rev(c("Intensity","Age at Start","Imitation","ADOS CSS",
"Nonverbal DQ","Verbal DQ","VABS ABC","Duration"))) +
ggtitle("PCA Loadings") +
easy_center_title() +
theme_minimal() +
theme(axis.text.y = element_text(hjust = 0.95),
plot.title = element_text(hjust = 0.5))
ggsave(filename = file.path(plotpath, "24mo_pcaloadings.pdf"), plot = p)
p
# plot screeplot
pca_res = prcomp(data2use[,pcavars], scale=TRUE)
data2plot = data.frame(comps = colnames(pca_res$rotation),
percent_var = ((pca_res$sdev^2) / sum(pca_res$sdev^2)) * 100)
p = ggplot(data = data2plot, aes(x = comps, y = percent_var)) +
geom_point() +
geom_line(aes(group=1)) +
xlab("Components") + ylab("Percentage Variance Explained") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
ggsave(filename = file.path(plotpath,"24mo_screeplot.pdf"), plot = p)
p
# Plot the PCs by responder status
data2plot = result$data
vars2plot = c("PC1","PC2")
for (var2plot in vars2plot){
p = ggplot(data = data2plot, aes(x = .data[[responder_var]],
y = .data[[var2plot]],
colour = .data[[responder_var]])) +
geom_scatterbox() + ggtitle(var2plot) + easy_center_title()
print(p)
d_res = cohens_d(x = data2plot[data2plot[,responder_var]=="Responder", var2plot],
y = data2plot[data2plot[,responder_var]=="Non-Responder", var2plot])
print(sprintf("Effect size for %s is d = %f", var2plot, d_res))
}
## [1] "Effect size for PC1 is d = -0.929401"
## [1] "Effect size for PC2 is d = 0.653847"
# plot recon data retaining PC1 and PC2
tmp_res = logistic_regression(data = tidy_data,
formula = form2use,
vars2scale = continuous_predictors,
responder_var = responder_var,
do_pca = TRUE,
pcavars = pcavars,
recon_pcs = c("PC1","PC2"),
print_output = FALSE)
data2plot = tmp_res$pca_recon_data
vars2plot = c("age_at_start","tx_duration","intensity",
"pre_imitation","pre_vabs_abc","pre_ados_css",
"pretx_verbal_dq","pretx_nonverbal_dq")
varnames2use = c("Age at Start","Treatment Duration","Treatment Intensity",
"Imitation","VABS ABC","ADOS CSS",
"Verbal DQ","Nonverbal DQ")
i = 0
plots2save = list()
for (var2plot in vars2plot){
i = i+1
p = ggplot(data = data2plot, aes(x = .data[[responder_var]],
y = .data[[var2plot]],
colour = .data[[responder_var]])) +
# geom_scatterbox() +
geom_jitter(size=10) +
geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA) +
ylab(varnames2use[i]) +
scale_colour_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
guides(colour="none") +
ggtitle(varnames2use[i]) + easy_center_title() +
easy_remove_x_axis(what = "title") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
ggsave(filename = file.path(plotpath,sprintf("%s_24mo_reconplot.pdf",var2plot)), plot = p)
plots2save[[i]] = p
print(p)
d_res = cohens_d(x = data2plot[data2plot[,responder_var]=="Responder", var2plot],
y = data2plot[data2plot[,responder_var]=="Non-Responder", var2plot])
print(sprintf("Effect size for %s is d = %f", var2plot, d_res))
}
## [1] "Effect size for age_at_start is d = -0.489102"
## [1] "Effect size for tx_duration is d = 0.504642"
## [1] "Effect size for intensity is d = -0.354117"
## [1] "Effect size for pre_imitation is d = 0.725119"
## [1] "Effect size for pre_vabs_abc is d = 1.124856"
## [1] "Effect size for pre_ados_css is d = -1.092412"
## [1] "Effect size for pretx_verbal_dq is d = 1.135789"
## [1] "Effect size for pretx_nonverbal_dq is d = 0.934539"
p_final = plots2save[[1]] + plots2save[[3]] + plots2save[[6]] + plots2save[[4]] + plots2save[[5]] + plots2save[[8]] + plots2save[[7]] + plots2save[[2]] + plot_layout(nrow = 2, ncol = 4)
ggsave(filename = file.path(plotpath,"24mo_reconplot.pdf"), plot = p_final,
width = 24, height=15)
p_final
IVs: sex + tx_broad + age_at_start + intensity + pre_imitation + pre_vabs_abc + pre_ados_css + pretx_verbal_dq + pretx_nonverbal_dq
RFX: (1 | dataset)
responder_var = "responder15"
# Responder Status x Treatment Type (Broad)
tmp_tbl = table(tidy_data[,responder_var],tidy_data$tx_broad)
print(tmp_tbl)
##
## Other EIBI NDBI ESDM
## Non-Responder 19 15 27 41
## Responder 17 38 65 84
max2use = max(tmp_tbl)
data2plot = data.frame(t(tmp_tbl))
# cols2use = get_ggColorHue(4)
tx_label2use = "ESDM"
p_esdm = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle(tx_label2use) +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
tx_label2use = "EIBI"
p_eibi = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle(tx_label2use) +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
tx_label2use = "NDBI"
p_ndbi = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle(tx_label2use) +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
tx_label2use = "Other"
p_other = ggplot(data = data2plot[data2plot$Var1==tx_label2use,], aes(x=Var2,y=Freq)) +
geom_bar(stat="identity",aes(fill = Var2)) +
xlab("Responder Type") +
ylab("Sample Size") + ylim(0,max2use) +
scale_fill_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
theme_bw() +
guides(fill="none") +
ggtitle("Classroom-Based") +
easy_center_title() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
p_final = p_esdm + p_ndbi + p_eibi + p_other + plot_layout(nrow=2,ncol=2)
ggsave(filename = file.path(plotpath, "tx_sample_size_15mo.pdf"), plot = p_final,
width = 16, height = 10)
p_final
# Correlation matrix after using only subjects with complete responder18 labels
mask = rowSums(is.na(tidy_data[,c(responder_var,continuous_predictors)]))==0
data2use = tidy_data %>% filter(mask)
corr_mat = cor(data2use[,continuous_predictors], use="pairwise.complete.obs")
dist = as.dist((1-corr_mat)/2)
hc = hclust(dist)
corr_mat = corr_mat[hc$order, hc$order]
data2plot = melt(corr_mat)
# colnames(data2plot)[3] = "r"
p = ggplot(data = data2plot, aes(x = Var1, y = Var2, fill = value, label = round(value,2))) +
geom_tile() +
geom_text() +
labs(x = NULL, y = NULL, fill = "r") +
scale_fill_gradientn(colours = colorRampPalette(c("blue","white","red"))(100),
limits = c(-1,1),
oob = scales::squish) +
scale_x_discrete(labels = c("Imitation", "Duration","VABS ABC","Verbal DQ",
"Nonverbal DQ", "ADOS CSS","Age at Start","Intensity")) +
scale_y_discrete(labels = c("Imitation", "Duration","VABS ABC","Verbal DQ",
"Nonverbal DQ", "ADOS CSS","Age at Start","Intensity")) +
ggtitle("Predictor Variable Correlations") +
easy_center_title() +
theme_minimal() +
theme(axis.text.x = element_text(hjust = 0.95),
axis.text.y = element_text(hjust = 0.95),
plot.title = element_text(hjust = 0.5),
legend.title.align=0.22)
p = p + easy_rotate_x_labels(angle = 90) +
theme(axis.text.y = element_text(hjust = 0.95),
axis.text.x = element_text(hjust = 0.95),
legend.title.align=0.22)
ggsave(filename = file.path(plotpath, "15mo_corrmat.pdf"), plot = p)
p
# run again with PCA on continuous predictors
form2use = as.formula(sprintf("%s ~ sex + tx_broad + PC1 + PC2 + PC3 + PC4 + PC5 + (1 | dataset)",responder_var))
# fit model
result = logistic_regression(data = tidy_data,
formula = form2use,
vars2scale = continuous_predictors,
responder_var = responder_var,
do_pca = TRUE,
pcavars = pcavars,
recon_pcs = c("PC1","PC2"))
## Generalized linear mixed model fit by maximum likelihood (Adaptive
## Gauss-Hermite Quadrature, nAGQ = 10) [glmerMod]
## Family: binomial ( logit )
## Formula: responder15 ~ sex + tx_broad + PC1 + PC2 + PC3 + PC4 + PC5 +
## (1 | dataset)
## Data: scaled_data2use
## AIC BIC logLik deviance df.resid
## 98.2549 128.1584 -38.1275 76.2549 101
## Random effects:
## Groups Name Std.Dev.
## dataset (Intercept) 0
## Number of obs: 112, groups: dataset, 8
## Fixed Effects:
## (Intercept) sexMale tx_broadEIBI tx_broadNDBI tx_broadESDM
## -4.0086 0.7012 6.1246 5.8779 5.1198
## PC1 PC2 PC3 PC4 PC5
## -0.3347 1.9257 0.6276 1.3699 1.1460
## optimizer (bobyqa) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings
## beta ci.low95 ci.high85 OR ci.OR.low95
## (Intercept) -4.0086424 -7.26115953 -0.7561253 0.01815803 0.0007022932
## sexMale 0.7012270 -0.77939627 2.1818503 2.01622514 0.4586828501
## tx_broadEIBI 6.1245863 1.37623285 10.8729397 456.95561043 3.9599557325
## tx_broadNDBI 5.8778524 1.81200071 9.9437040 357.04162755 6.1226848980
## tx_broadESDM 5.1198333 1.68649802 8.5531685 167.30747427 5.4005349918
## PC1 -0.3347430 -0.88043618 0.2109501 0.71552193 0.4146020330
## PC2 1.9257325 1.01139271 2.8400723 6.86017199 2.7494275095
## PC3 0.6275639 -0.22669614 1.4818240 1.87304214 0.7971629744
## PC4 1.3698956 0.44245971 2.2973314 3.93493972 1.5565311286
## PC5 1.1460226 -0.03379945 2.3258446 3.14565636 0.9667653672
## ci.OR.high95 pval fdr
## (Intercept) 0.469482 1.570720e-02 0.0261786702
## sexMale 8.862690 3.532722e-01 0.3532721608
## tx_broadEIBI 52729.990942 1.146902e-02 0.0229380315
## tx_broadNDBI 20820.722596 4.604131e-03 0.0115103274
## tx_broadESDM 5183.151482 3.469249e-03 0.0115103274
## PC1 1.234851 2.292406e-01 0.2547117578
## PC2 17.117003 3.658607e-05 0.0003658607
## PC3 4.400966 1.499036e-01 0.1873794585
## PC4 9.947601 3.790693e-03 0.0115103274
## PC5 10.235321 5.692971e-02 0.0813281626
# pairwise comparisons of tx_broad
pairwise_res = emmeans(result$model, pairwise ~ tx_broad)
pairwise_res
## $emmeans
## tx_broad emmean SE df asymp.LCL asymp.UCL
## Other -3.56 1.502 Inf -6.500 -0.612
## EIBI 2.57 1.745 Inf -0.852 5.990
## NDBI 2.32 0.797 Inf 0.761 3.883
## ESDM 1.56 0.651 Inf 0.287 2.841
##
## Results are averaged over the levels of: sex
## Results are given on the logit (not the response) scale.
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df z.ratio p.value
## Other - EIBI -6.125 2.423 Inf -2.528 0.0557
## Other - NDBI -5.878 2.074 Inf -2.833 0.0238
## Other - ESDM -5.120 1.752 Inf -2.923 0.0182
## EIBI - NDBI 0.247 1.763 Inf 0.140 0.9990
## EIBI - ESDM 1.005 1.804 Inf 0.557 0.9447
## NDBI - ESDM 0.758 0.982 Inf 0.772 0.8672
##
## Results are averaged over the levels of: sex
## Results are given on the log odds ratio (not the response) scale.
## P value adjustment: tukey method for comparing a family of 4 estimates
summary(result$pca_res)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.8949 1.1039 1.0383 0.9388 0.70242 0.65228 0.44322
## Proportion of Variance 0.4489 0.1523 0.1348 0.1102 0.06167 0.05318 0.02456
## Cumulative Proportion 0.4489 0.6012 0.7359 0.8461 0.90778 0.96097 0.98552
## PC8
## Standard deviation 0.34034
## Proportion of Variance 0.01448
## Cumulative Proportion 1.00000
data2plot = result$pca_res$rotation[hc$order,]
data2plot = melt(data2plot)
p = ggplot(data = data2plot, aes(x = Var2, y = Var1, fill = value, label = round(value,2))) +
geom_tile() +
geom_text() +
labs(x = NULL, y = NULL, fill = "Loading") +
scale_fill_gradientn(colours = colorRampPalette(c("blue","white","red"))(100),
limits = c(-0.5,0.5),
oob = scales::squish) +
scale_x_discrete(labels = colnames(result$pca_res$rotation)) +
scale_y_discrete(labels = rev(c("Intensity","Age at Start","ADOS CSS","Nonverbal DQ",
"Verbal DQ","VABS ABC","Duration","Imitation"))) +
ggtitle("PCA Loadings") +
easy_center_title() +
theme_minimal() +
theme(axis.text.y = element_text(hjust = 0.95),
plot.title = element_text(hjust = 0.5))
ggsave(filename = file.path(plotpath, "15mo_pcaloadings.pdf"), plot = p)
p
# plot screeplot
pca_res = prcomp(data2use[,pcavars], scale=TRUE)
data2plot = data.frame(comps = colnames(pca_res$rotation),
percent_var = ((pca_res$sdev^2) / sum(pca_res$sdev^2)) * 100)
p = ggplot(data = data2plot, aes(x = comps, y = percent_var)) +
geom_point() +
geom_line(aes(group=1)) +
xlab("Components") + ylab("Percentage Variance Explained") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
ggsave(filename = file.path(plotpath,"15mo_screeplot.pdf"), plot = p)
p
# Plot the PCs by responder status
data2plot = result$data
vars2plot = c("PC2","PC4")
for (var2plot in vars2plot){
p = ggplot(data = data2plot, aes(x = .data[[responder_var]],
y = .data[[var2plot]],
colour = .data[[responder_var]])) +
geom_scatterbox() + ggtitle(var2plot) + easy_center_title()
print(p)
d_res = cohens_d(x = data2plot[data2plot[,responder_var]=="Responder", var2plot],
y = data2plot[data2plot[,responder_var]=="Non-Responder", var2plot])
print(sprintf("Effect size for %s is d = %f", var2plot, d_res))
}
## [1] "Effect size for PC2 is d = 0.766249"
## [1] "Effect size for PC4 is d = 0.298752"
# plot recon data retaining PC2 and PC4
tmp_res = logistic_regression(data = tidy_data,
formula = form2use,
vars2scale = continuous_predictors,
responder_var = responder_var,
do_pca = TRUE,
pcavars = pcavars,
recon_pcs = c("PC2","PC4"),
print_output = FALSE)
data2plot = tmp_res$pca_recon_data
vars2plot = c("age_at_start","tx_duration","intensity",
"pre_imitation","pre_vabs_abc","pre_ados_css",
"pretx_verbal_dq","pretx_nonverbal_dq")
varnames2use = c("Age at Start","Treatment Duration","Treatment Intensity",
"Imitation","VABS ABC","ADOS CSS",
"Verbal DQ","Nonverbal DQ")
i = 0
plots2save = list()
for (var2plot in vars2plot){
i = i+1
p = ggplot(data = data2plot, aes(x = .data[[responder_var]],
y = .data[[var2plot]],
colour = .data[[responder_var]])) +
# geom_scatterbox() +
geom_jitter(size=10) +
geom_boxplot(fill = NA, colour = "#000000", outlier.shape = NA) +
ylab(varnames2use[i]) +
scale_colour_manual(values=cols2use) +
scale_x_discrete(labels = c("Non-Advancer", "Advancer")) +
guides(colour="none") +
ggtitle(varnames2use[i]) + easy_center_title() +
easy_remove_x_axis(what = "title") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5, size=fontSize+8),
strip.text.y = element_text(size=fontSize),
axis.text.x = element_text(size=fontSize),
axis.text.y = element_text(size=fontSize-2),
axis.title.x = element_blank(),
axis.title.y = element_text(size=fontSize),
strip.text = element_text(size = fontSize))
ggsave(filename = file.path(plotpath,sprintf("%s_15mo_reconplot.pdf",var2plot)), plot = p)
plots2save[[i]] = p
print(p)
d_res = cohens_d(x = data2plot[data2plot[,responder_var]=="Responder", var2plot],
y = data2plot[data2plot[,responder_var]=="Non-Responder", var2plot])
print(sprintf("Effect size for %s is d = %f", var2plot, d_res))
}
## [1] "Effect size for age_at_start is d = 0.756136"
## [1] "Effect size for tx_duration is d = 0.565083"
## [1] "Effect size for intensity is d = 0.789705"
## [1] "Effect size for pre_imitation is d = 0.794774"
## [1] "Effect size for pre_vabs_abc is d = 0.207615"
## [1] "Effect size for pre_ados_css is d = -0.356252"
## [1] "Effect size for pretx_verbal_dq is d = 0.400548"
## [1] "Effect size for pretx_nonverbal_dq is d = -0.532218"
p_final = plots2save[[1]] + plots2save[[3]] + plots2save[[6]] + plots2save[[4]] + plots2save[[5]] + plots2save[[8]] + plots2save[[7]] + plots2save[[2]] + plot_layout(nrow = 2, ncol = 4)
ggsave(filename = file.path(plotpath,"15mo_reconplot.pdf"), plot = p_final,
width = 24, height=15)
p_final